Data Source

Source: Replication Data for “Disparities in PM2.5 air pollution in the United States”

About the Data

These data show air pollution, via the concentrations of fine particulate matter that is less than 2.5 micrometers in diameter (PM2.5), at each census tract. PM2.5 concentrations are measured by the number of micrograms per cubic meter. High concentrations of PM2.5 indicate higher levels of air pollution.

The data provides PM2.5 concentrations at every year from 1981-2016.

Variable Descriptions

glimpse(select(airquality, statefp, countyfp, tract, 
               pm2_5_1981, pm2_5_2016, pm_change_1981_2016,
               percentile_1981, percentile_2016,
               pctile_change_1981_2016))
## Rows: 16
## Columns: 9
## $ statefp                 <dbl> 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51…
## $ countyfp                <chr> "001", "001", "001", "001", "001", "001", "001…
## $ tract                   <chr> "090100", "980100", "090200", "090300", "09040…
## $ pm2_5_1981              <dbl> 26.81724, 26.81429, 24.63474, 24.26249, 23.059…
## $ pm2_5_2016              <dbl> 7.500889, 7.500028, 6.864208, 6.771635, 6.4862…
## $ pm_change_1981_2016     <dbl> -19.31635, -19.31426, -17.77053, -17.49085, -1…
## $ percentile_1981         <dbl> 56.2, 56.1, 27.2, 22.6, 13.5, 21.0, 11.2, 15.2…
## $ percentile_2016         <dbl> 54.6, 54.5, 20.1, 17.9, 12.7, 18.1, 10.7, 15.1…
## $ pctile_change_1981_2016 <dbl> -1.6, -1.6, -7.1, -4.7, -0.8, -2.9, -0.5, -0.1…

Observations are census tract estimates of:

  • PM2.5 levels in 1981 through 2016 (pm2_5_1981-pm2_5_2016)
  • Percentile rankings in 1981 and 2016 (percentile_1981 and percentile_2016)
    • Percentile rankings were calculated among census tracts within the Commonwealth and were not altered once the data were filtered to just the Charlottesville region.
  • Change in PM2.5 level between 1981 and 2016 (pm_change_1981_2016)
  • Change in percentile rank between 1981 and 2016 (pctile_change_1981_2016)

Summaries

Five-number summaries of all variables:

airquality %>% select(statefp, countyfp, tract, 
               pm2_5_1981, pm2_5_2016, pm_change_1981_2016,
               percentile_1981, percentile_2016,
               pctile_change_1981_2016) %>% 
  as.data.frame() %>% 
  stargazer(., type = "text", title = "Summary Statistics", digits = 1,
            summary.stat = c("mean", "sd", "min", "median", "max"))
## 
## Summary Statistics
## =========================================================
## Statistic               Mean  St. Dev.  Min  Median  Max 
## ---------------------------------------------------------
## statefp                 51.0    0.0     51     51    51  
## pm2_5_1981              23.3    2.0    20.2   23.2  26.8 
## pm2_5_2016               6.6    0.5     5.9   6.5    7.5 
## pm_change_1981_2016     -16.7   1.5    -19.3 -16.7  -14.3
## percentile_1981         19.4    16.3    4.1   14.0  56.2 
## percentile_2016         18.3    15.0    6.1   13.2  54.6 
## pctile_change_1981_2016 -1.1    3.0    -7.1   -0.7   2.4 
## ---------------------------------------------------------

Visual Distributions

Tract distributions of PM2.5 in 1981 and 2016:

airquality %>% select(tract, pm2_5_1981, pm2_5_2016) %>% 
  pivot_longer(-tract, names_to = "measure", values_to = "value") %>% 
  ggplot(aes(x = value, fill = measure)) + 
  geom_histogram() + 
  facet_wrap(~measure, scales = "free") +
  xlab("PM2.5") +
  scale_fill_discrete(labels = c("PM2.5 in 1981", "PM2.5 in 2016"))

meta %>% 
  filter(varname %in% c("pm2_5_1981", "pm2_5_2016")) %>%
  mutate(label = paste0(varname, ": ", about)) %>% 
  select(label) %>% 
  as.list()
## $label
## [1] "pm2_5_1981: Concentration of PM2.5 in 1981"
## [2] "pm2_5_2016: Concentration of PM2.5 in 2016"

Tract-level change from 1981-2016

p <- airquality %>% 
  select(-c(percentile_1981, percentile_2016, 
            pm_change_1981_2016, pctile_change_1981_2016)) %>% 
  pivot_longer(cols = starts_with("pm2"), 
               names_to = "year", values_to = "pm2_5",
               names_prefix = "pm2_5_") %>% 
  ggplot(aes(x = year, y = pm2_5, color = countyfp)) +
  geom_line(aes(group = tract)) +
  guides(color = "none")
ggplotly(p) %>% layout(showlegend = FALSE)

Percentile in 1981 vs. percentile in 2016

airquality %>% 
  ggplot() +
  geom_point(aes(x=percentile_1981, y=percentile_2016)) +
  xlim(0, 100) +
  ylim(0, 100) +
  geom_abline(intercept = 0, slope = 1, color = "red") +
  xlab("Percentile in 1981") +
  ylab("Percentile in 2016")

meta %>% 
  filter(varname %in% c("percentile_1981", "percentile_2016")) %>%
  mutate(label = paste0(varname, ": ", about)) %>% 
  select(label) %>% 
  as.list()
## $label
## [1] "percentile_1981: Statewide percentile rank in 1981, on a scale of 0-100"
## [2] "percentile_2016: Statewide percentile rank in 2016, on a scale of 0-100"

This scatterplot shows the relationship between a census tract’s percentile rank in 1981 and its percentile rank in 2016. The red line shows where the data would be if their percentiles in 1981 and 2016 were the same.

Spatial Distributions

1981

PM2.5 Concentration

pal <- colorNumeric("Blues", reverse = FALSE, domain = easternshapes$pm2_5_1981)

leaflet(easternshapes) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(data = easternshapes,
              fillColor = ~pal(pm2_5_1981),
              weight = 1,
              opacity = 1,
              color = "white",
              fillOpacity = 0.6,
              highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
              popup = paste0("Tract Number: ", easternshapes$GEOID, "<br>",
                             "Concentration: ", easternshapes$pm2_5_1981)) %>%
  addLegend("bottomright", pal = pal, values = easternshapes$pm2_5_1981,
            title = "PM2.5 Concentration, 1981", opacity = 0.7)
meta %>% 
  filter(varname=="pm2_5_1981") %>%
  mutate(label = paste0(varname, ": ", about)) %>% 
  select(label) %>% 
  as.list()
## $label
## [1] "pm2_5_1981: Concentration of PM2.5 in 1981"

Percentile

pal <- colorNumeric("Blues", reverse = FALSE, domain = easternshapes$percentile_1981)

leaflet(easternshapes) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(data = easternshapes,
              fillColor = ~pal(percentile_1981),
              weight = 1,
              opacity = 1,
              color = "white",
              fillOpacity = 0.6,
              highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
              popup = paste0("Tract Number: ", easternshapes$GEOID, "<br>",
                             "Percentile: ", round(easternshapes$percentile_1981, 2))) %>%
  addLegend("bottomright", pal = pal, values = easternshapes$percentile_1981,
            title = "Percentile, 1981", opacity = 0.7)
meta %>% 
  filter(varname=="percentile_1981") %>%
  mutate(label = paste0(varname, ": ", about)) %>% 
  select(label) %>% 
  as.list()
## $label
## [1] "percentile_1981: Statewide percentile rank in 1981, on a scale of 0-100"

2016

PM2.5 Concentration

pal <- colorNumeric("Blues", reverse = FALSE, domain = easternshapes$pm2_5_2016)

leaflet(easternshapes) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(data = easternshapes,
              fillColor = ~pal(pm2_5_2016),
              weight = 1,
              opacity = 1,
              color = "white",
              fillOpacity = 0.6,
              highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
              popup = paste0("Tract Number: ", easternshapes$GEOID, "<br>",
                             "Concentration: ", easternshapes$pm2_5_2016)) %>%
  addLegend("bottomright", pal = pal, values = easternshapes$pm2_5_2016,
            title = "PM2.5 Concentration, 2016", opacity = 0.7)
meta %>% 
  filter(varname=="pm2_5_2016") %>%
  mutate(label = paste0(varname, ": ", about)) %>% 
  select(label) %>% 
  as.list()
## $label
## [1] "pm2_5_2016: Concentration of PM2.5 in 2016"

Percentile

pal <- colorNumeric("Blues", reverse = FALSE, domain = easternshapes$percentile_2016)

leaflet(easternshapes) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(data = easternshapes,
              fillColor = ~pal(percentile_2016),
              weight = 1,
              opacity = 1,
              color = "white",
              fillOpacity = 0.6,
              highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
              popup = paste0("Tract Number: ", easternshapes$GEOID, "<br>",
                             "Percentile: ", round(easternshapes$percentile_2016, 2))) %>%
  addLegend("bottomright", pal = pal, values = easternshapes$percentile_2016,
            title = "Percentile, 2016", opacity = 0.7)
meta %>% 
  filter(varname=="percentile_2016") %>%
  mutate(label = paste0(varname, ": ", about)) %>% 
  select(label) %>% 
  as.list()
## $label
## [1] "percentile_2016: Statewide percentile rank in 2016, on a scale of 0-100"

Change

Change in PM2.5, 1981-2016

pal <- colorNumeric("Blues", reverse = TRUE, domain = easternshapes$pm_change_1981_2016)

leaflet(easternshapes) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(data = easternshapes,
              fillColor = ~pal(pm_change_1981_2016),
              weight = 1,
              opacity = 1,
              color = "white",
              fillOpacity = 0.6,
              highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
              popup = paste0("Tract Number: ", easternshapes$GEOID, "<br>",
                             "PM2.5 Change: ", round(easternshapes$pm_change_1981_2016, 2))) %>%
  addLegend("bottomright", pal = pal, values = easternshapes$pm_change_1981_2016,
            title = "Change in PM2.5, 1981-2016", opacity = 0.7)
meta %>% 
  filter(varname=="pm_change_1981_2016") %>%
  mutate(label = paste0(varname, ": ", about)) %>% 
  select(label) %>% 
  as.list()
## $label
## [1] "pm_change_1981_2016: Change in the concentration of PM2.5 from 1981 to 2016 (pm2_5_2016 - pm2_5_1981)"

Percentile Change, 1981-2016

pal <- colorNumeric("Blues", reverse = TRUE, domain = easternshapes$pctile_change_1981_2016)

leaflet(easternshapes) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(data = easternshapes,
              fillColor = ~pal(pctile_change_1981_2016),
              weight = 1,
              opacity = 1,
              color = "white",
              fillOpacity = 0.6,
              highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
              popup = paste0("Tract Number: ", easternshapes$GEOID, "<br>",
                             "Percentile Change: ", round(easternshapes$pctile_change_1981_2016, 2))) %>%
  addLegend("bottomright", pal = pal, values = easternshapes$pctile_change_1981_2016,
            title = "Percentile Change, 1981-2016", opacity = 0.7)
meta %>% 
  filter(varname=="pctile_change_1981_2016") %>%
  mutate(label = paste0(varname, ": ", about)) %>% 
  select(label) %>% 
  as.list()
## $label
## [1] "pctile_change_1981_2016: Change in statewide percentile rank from 1981 to 2016 (percentile_2016 - percentile_1981)"

Important Notes

The original data uses 2000 census tracts, since that is roughly the midpoint of their 1981-2016 time frame. To integrate this with other 2010 tract-level data, we interpolated the 2000 tract measures to 2010 tracts using areal interpoloation.